/*
-- IOTA Crypto Core
--
-- 2018 by Thomas Pototschnig <microengineer18@gmail.com>
-- discord: pmaxuw#8292
-- https://gitlab.com/iccfpga-rv
--
-- Permission is hereby granted, free of charge, to any person obtaining
-- a copy of this software and associated documentation files (the
-- "Software"), to deal in the Software without restriction, including
-- without limitation the rights to use, copy, modify, merge, publish,
-- distribute, sublicense, and/or sell copies of the Software, and to
-- permit persons to whom the Software is furnished to do so, subject to
-- the following conditions:
-- 
-- The above copyright notice and this permission notice shall be
-- included in all copies or substantial portions of the Software.
-- 
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWAR
*/

#include <fpga/fpga.h>
#include <string.h>
#include <stdlib.h>
#include "fpga/troika.h"

#include "debugprintf.h"



#define COLUMNS 9
#define ROWS 3
#define SLICES 27
#define SLICESIZE COLUMNS*ROWS
#define STATESIZE COLUMNS*ROWS*SLICES
#define NUM_SBOXES SLICES*ROWS*COLUMNS/3

#define PADDING 0x1

#define REG_FLAGS	0x01

#define O3 __attribute__((optimize("-O3")))

// byte-lanes-enable for writing to pidiver-registers not implemented on FPGA
// -O0 does byte accesses which leads to incorrect results - force something higher
__attribute__((optimize("unroll-loops")))
inline O3 uint32_t TroikaFPGA::troikaStateToBCT(uint32_t* trits, int len) {
	uint32_t bct = 0;
	Trit* ptrits = (Trit*) trits;
	for (int i=0;i<len;i++) {
		bct |= ptrits[i] << (2*i);
	}
	return bct;
}

__attribute__((optimize("unroll-loops")))
inline O3 void TroikaFPGA::troikaBCTToState(uint32_t bct, uint32_t* trits, int len) {
	Trit* ptrits = (Trit*) trits;
	for (int i=0;i<len;i++) {
		ptrits[i] = (bct >> (i*2)) & 0x3;
	}
}


O3 void TroikaFPGA::troikaConvertState(uint32_t* state_bct, uint32_t* trits, int size)  {
	uint32_t* dst = state_bct;
	uint32_t* src = trits;
	for (int i=0;i<size/16;i++) {
		*dst = troikaStateToBCT(src);
		dst++;
		src += 4;	// 16-trit-wise
	}
	uint32_t rem = size % 16;
	uint32_t mask = (1<<(2*rem))-1;
	*dst &= ~mask;
	*dst |= troikaStateToBCT(src, rem) & mask;
}

O3 void TroikaFPGA::troikaConvertStateBack(uint32_t* trits, uint32_t* state_bct, int size) {
	uint32_t* dst = trits;
	uint32_t* src = state_bct;
	for (int i=0;i<size/16;i++) {
		troikaBCTToState(*src, dst);
		src++;
		dst += 4;
	}
	uint32_t rem = size % 16;
	troikaBCTToState(*src, dst, rem);
}

void xprint(Trit *state) {
	char buf[28]={0};
	debugPrintf("----\n");
	for (int y=0;y<27;y++) {
		for (int x=0;x<27;x++) {
			buf[x] = state[y*27+x]+48;
		}
		debugPrintf("%s\n", buf);
	}
}

void TroikaFPGA::TroikaPermutation(uint32_t num_rounds) {
	FPGA::pidiverWrite(CMD_WRITE_TROIKA_NUM_ROUNDS, num_rounds);
	FPGA::pidiverWrite(CMD_WRITE_FLAGS, FLAG_TROIKA_LOAD_RATE);
	FPGA::pidiverWrite(CMD_WRITE_FLAGS, FLAG_TROIKA_START);

	while (FPGA::pidiverRead(CMD_READ_FLAGS) & FLAG_TROIKA_RUNNING) ;
}

uint32_t* TroikaFPGA::getAlignedMessage(const Trit* message, int size) {
	uint32_t* mp;
	if ((uint32_t) message & 0x3) {	// not 32bit aligned ... copy
		memcpy(m_message, message, size);
		mp = m_message;
	} else {
		mp = (uint32_t*) message;
	}
	return mp;
}


void TroikaFPGA::TroikaAbsorb(unsigned int rate, const Trit *message, uint32_t message_length, uint32_t num_rounds) {
	while (message_length >= rate) {
		uint32_t* mp = getAlignedMessage(message, rate);
		troikaConvertState((uint32_t*) FPGA::pidiverGetDataWritePtr(), mp, rate);

		TroikaPermutation(num_rounds);
		message_length -= rate;
		message += rate;
	}

	// todo make it better than this ...
	memset(m_message, 0, rate);
	memcpy(m_message, message, message_length);

	((Trit*) m_message)[message_length] = PADDING;
	troikaConvertState((uint32_t*) FPGA::pidiverGetDataWritePtr(), m_message, rate);
}

void TroikaFPGA::TroikaSqueeze(Trit *hash, uint32_t hash_length, unsigned int rate, uint32_t num_rounds) {
	while (hash_length >= rate) {
		TroikaPermutation(num_rounds);
		troikaConvertStateBack(m_message, (uint32_t*) FPGA::troikaReadDataPtr(), rate);
		memcpy(hash, m_message, rate);
		hash += rate;
		hash_length -= rate;
	}

	// Check if there is a last incomplete block
	if (hash_length % rate) {
		TroikaPermutation(num_rounds);
		troikaConvertStateBack(m_message, (uint32_t*) FPGA::troikaReadDataPtr(), rate);
		memcpy(hash, m_message, rate);
	}
}

void TroikaFPGA::TroikaVarRounds(Trit *out, unsigned long long outlen, const Trit *in, unsigned long long inlen,
		unsigned long long num_rounds) {

	// always reset state
	FPGA::pidiverWrite(CMD_WRITE_FLAGS, FLAG_TROIKA_RESET);

	memset(m_stateCompressed, 0, sizeof(m_stateCompressed));

	TroikaAbsorb(TROIKA_RATE, in, inlen, num_rounds);
	TroikaSqueeze(out, outlen, TROIKA_RATE, num_rounds);
}

void TroikaFPGA::doTroika(Trit *out, unsigned long long outlen, const Trit *in, unsigned long long inlen) {
	TroikaVarRounds(out, outlen, in, inlen, NUM_ROUNDS);
}

bool TroikaFPGA::memoryLoopTest() {
	uint32_t input[FPGA_TROIKA_STATESIZE];
	uint32_t output[FPGA_TROIKA_STATESIZE];
	volatile uint32_t* pidiverWritePtr = FPGA::pidiverGetDataWritePtr();
	volatile uint32_t* troikaReadPtr = FPGA::troikaReadDataPtr();

	for (int i=0;i<FPGA_TROIKA_STATESIZE;i++) {
		input[i] = rand();
		if (i == FPGA_TROIKA_STATESIZE-1) {
			input[i] &= 0x0003ffff;	// last bits physically not present in hardware
		}
		pidiverWritePtr[i] = input[i];
	}

	FPGA::pidiverWrite(CMD_WRITE_FLAGS, FLAG_TROIKA_RESET);
	FPGA::pidiverWrite(CMD_WRITE_FLAGS, FLAG_TROIKA_LOAD_RATE | FLAG_TROIKA_LOAD_CAPACITY);

	memset(output, 0, sizeof(output));
	for (int i=0;i<FPGA_TROIKA_STATESIZE;i++) {
		output[i] = troikaReadPtr[i];
		if (i == FPGA_TROIKA_STATESIZE-1) {
			output[i] &= 0x0003ffff;	// last bits physically not present in hardware
		}
	}

	for (int i=0;i<FPGA_TROIKA_STATESIZE;i++) {
		if (input[i] != output[i])
		{
			debugPrintf("mismatch! %d %08x %08x", i, input[i], output[i]);
			return false;
		}
	}
	return true;
}












